package data_deepprocessing.algorithm.word_embedding.service;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import org.apache.log4j.Logger;
import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer;
import org.deeplearning4j.models.word2vec.Word2Vec;
import org.deeplearning4j.text.sentenceiterator.BasicLineIterator;
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;

import data_deepprocessing.util.FilePathUtil;
import data_deepprocessing.util.FileUtil;


/**
 * @author 作者 : YUHU YUAN
 * @date 创建时间：2017年3月31日 下午1:30:14
 * @version 1.0
 */

public class YYH_Word2VectorService {

	private static Logger log = Logger.getLogger(YYH_Word2VectorService.class);

	public static Word2Vec getWord2Vec() throws IOException {
		log.info("Load & Vectorize Sentences....");
		// Strip white space before and after for each line
		SentenceIterator iter = new BasicLineIterator(FilePathUtil.SEGMENTED_XIANBINGSHI_PATH);
		// Split on white spaces in the line to get words
		log.info("Building model....");
		Word2Vec word2Vec = new Word2Vec.Builder().minWordFrequency(5).iterations(10).layerSize(200).seed(42).windowSize(5)
				.iterate(iter).build();
		log.info("Fitting Word2Vec model....");
		word2Vec.fit();
		log.info("Writing word vectors to text file....");
		// Write word vectors
		WordVectorSerializer.writeWordVectors(word2Vec, FilePathUtil.WORD_EMBEDDING_VECTOR_PATH);
		WordVectorSerializer.writeFullModel(word2Vec, FilePathUtil.WORD_EMBEDDING_MODEL_PATH);
		
//		String name = "口干";
//		log.info("Closest Words:");
//
//		System.out.println(name + ">>>>>>");
//		Collection<String> lst = word2Vec.wordsNearest(name, 100);
//		System.out.println(lst);
		return word2Vec;
	}
	
	public static void testModelEffect(){
		Word2Vec word2Vec = WordVectorSerializer.readWord2VecModel(FilePathUtil.WORD_EMBEDDING_MODEL_PATH);
		String name = "口干";
		log.info("Closest Words:");
		System.out.println(name + ">>>>>>");
		Collection<String> lst = word2Vec.wordsNearest(name, 100);
		System.out.println(lst);
	}
	
	
	/** 
	* @author  作者 : YUHU YUAN
	* @date 创建时间：2017年4月5日 下午9:37:28 
	* @parameter 
	* @return
	* @throws
	* 判断准确性的时候用
	*/
	public static Map<String,String> getWordVec_Word() throws IOException{
		BufferedReader reader = FileUtil.getReader(FilePathUtil.WORD_EMBEDDING_VECTOR_PATH);
		Map<String,String> map = new HashMap<>();
		String line = reader.readLine();
		while((line=reader.readLine())!=null){
			String string = line.substring(0, line.indexOf(" "));
			map.put(string, "");
		}
		return map;
		
	}
	

	public static void main(String[] args) throws Exception {
		getWord2Vec();
//		testModelEffect();
	}

}







